Podsumowanie analizy

Wykorzystane biblioteki

library(dplyr)
library(ggplot2)
library(ggforce)
library(gganimate)
library(tidyr)
library(caret)
library(DT)
library(summarytools)
library(gifski)
library(png)

Dane

Powtarzalność wyników

Aby zapewnić powtarzalność wyników ustawiamy stan losowego generatora liczb.

set.seed(123)

Wczytywanie danych

Dane rozdzielone są znakiem “;”, dlatego użyję funkcji read.csv2 do wczytania danych. Kolumna title jest połączeniem kolumn pdb_code, res_name, res_id oraz chain_id, zatem możemy ją usunąć podczas wczytywania, aby uniknąć powtarzania informacji.

all_data <- read.csv2("all_summary.csv", nrows = 1000, header = TRUE, dec=".", stringsAsFactors = FALSE) %>% select(-(blob_coverage:title))

Wstępne

Wiersze, które w kolumnie “res_name” zawierają niepożądaną przez nas wartość zostają usunięte.

cleaned_data <- all_data %>% filter(!res_name %in% c('UNK', 'UNX', 'UNL', 'DUM', 'N', 'BLOB', 'ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'MSE', 'PHE', 'PRO', 'SEC', 'SER', 'THR', 'TRP', 'TYR', 'VAL', 'DA', 'DG', 'DT', 'DC', 'DU', 'A', 'G', 'T', 'C', 'U', 'HOH', 'H20', 'WAT')) 

W zbiorze danych występuje kolumna, która równa jest NA we wszystkich wierszach.

rows_without_na_in_weight_co = filter(cleaned_data, !is.na(weight_col))
dim(rows_without_na_in_weight_co)[1]
## [1] 0

Jak widać brak wierszy, których kolumna weight_col nie ma wartości pustej, dlatego możemy ją wykluczyć z dalszej analizy.

cleaned_data_without_empty_col <- select(cleaned_data, -weight_col)

Rozmiar danych oraz ich statystyki

Zebrane dane zawierają… 408 kolumn oraz 1000 wierszy. Dane są typu character, integer, numeric. Większość kolumn jest numeryczna. Ich podstawowe statystyki prezentują się tak:

Natomiast pozostałe kolumny są następujące

knitr::kable(summary(cleaned_data_without_empty_col %>% select_if(is.character)))
pdb_code res_name chain_id skeleton_data fo_col fc_col
Length:1000 Length:1000 Length:1000 Length:1000 Length:1000 Length:1000
Class :character Class :character Class :character Class :character Class :character Class :character
Mode :character Mode :character Mode :character Mode :character Mode :character Mode :character

Analiza

Ograniczenie danych

Naszą analizę ograniczymy do 50 najpopularniejszych wartości kolumny res_name

top_50_res_name <- cleaned_data %>% 
  select(res_name) %>% 
  group_by(res_name) %>% 
  count() %>% 
  arrange(desc(n)) %>%
  head(50)


data_with_most_common_res_names <- cleaned_data %>% filter(res_name %in% top_50_res_name$res_name)

Rozkład jej wartości prezentuje się następująco

Korelacja

W celu sprawdzenia korelacji użyjemy korelacji Rho Spearmana, ponieważ rozkład wartości przynajmniej jednej kolumny nie jest rozkładem normalnym

correlation <- as.data.frame(as.table(cor(data_with_most_common_res_names %>% select_if(is.numeric), use="complete.obs", method="spearman")))
## Warning in cor(data_with_most_common_res_names %>% select_if(is.numeric), :
## odchylenie standardowe wynosi zero

Usuniemy teraz korelacje kolumn samych ze sobą

correlation <- correlation %>% 
  rename(first_column = Var1, second_column = Var2, freq = Freq) %>%
  filter(first_column != second_column)

Grupujemy po pierwszej kolumnie oraz dla każdej wartości obliczamy maksymalną wartośi. Następnie wyznaczmy 10 kolumn z największą korelacją oraz filtrujemy dane do wizualizacji

top_correlated <- correlation %>% 
  group_by(first_column) %>% 
  summarise(max=max(freq, na.rm = TRUE)) %>%
  arrange(desc(max)) %>%
  head(10)

correlation <- correlation %>% filter((first_column %in% top_correlated$first_column & second_column %in% top_correlated$first_column))

Rozkład wartości atomów oraz eleketronów

Niezgodność liczby atomów

Niezgodność liczby elektronów

Rozkład wartości kolumn rozpoczynających się od “part_01”

part_01_all <- data_with_most_common_res_names %>% select(contains('part_01'))

part_01_all <- gather(part_01_all, 'key', 'value')

n_pages <- ceiling(
  length(levels(factor(part_01_all$key))) / 9
)

continuous_means <- part_01_all %>% group_by(key) %>% summarise(mean_value=mean(value))

for (i in seq_len(n_pages)) {
  print(ggplot(part_01_all, aes(value, fill=1)) + 
    geom_density(show.legend=FALSE) + 
    geom_vline(data=continuous_means, aes(xintercept=mean_value), linetype="dashed") +
    geom_text(data = continuous_means, aes(label = mean_value, y=1, x=mean_value)) +
    facet_wrap_paginate(~ key, ncol = 3, nrow = 3, scales='free', page = i) + 
    theme_minimal())
}
## Warning: Removed 1152 rows containing non-finite values (stat_density).
## Warning: Removed 96 rows containing missing values (geom_vline).
## Warning: Removed 96 rows containing missing values (geom_text).

## Warning: Removed 1152 rows containing non-finite values (stat_density).
## Warning: Removed 96 rows containing missing values (geom_vline).
## Warning: Removed 96 rows containing missing values (geom_text).

## Warning: Removed 1152 rows containing non-finite values (stat_density).
## Warning: Removed 96 rows containing missing values (geom_vline).
## Warning: Removed 96 rows containing missing values (geom_text).

## Warning: Removed 1152 rows containing non-finite values (stat_density).
## Warning: Removed 96 rows containing missing values (geom_vline).
## Warning: Removed 96 rows containing missing values (geom_text).

## Warning: Removed 1152 rows containing non-finite values (stat_density).
## Warning: Removed 96 rows containing missing values (geom_vline).
## Warning: Removed 96 rows containing missing values (geom_text).

## Warning: Removed 1152 rows containing non-finite values (stat_density).
## Warning: Removed 96 rows containing missing values (geom_vline).
## Warning: Removed 96 rows containing missing values (geom_text).

## Warning: Removed 1152 rows containing non-finite values (stat_density).
## Warning: Removed 96 rows containing missing values (geom_vline).
## Warning: Removed 96 rows containing missing values (geom_text).

## Warning: Removed 1152 rows containing non-finite values (stat_density).
## Warning: Removed 96 rows containing missing values (geom_vline).
## Warning: Removed 96 rows containing missing values (geom_text).

## Warning: Removed 1152 rows containing non-finite values (stat_density).
## Warning: Removed 96 rows containing missing values (geom_vline).
## Warning: Removed 96 rows containing missing values (geom_text).

## Warning: Removed 1152 rows containing non-finite values (stat_density).
## Warning: Removed 96 rows containing missing values (geom_vline).
## Warning: Removed 96 rows containing missing values (geom_text).

## Warning: Removed 1152 rows containing non-finite values (stat_density).
## Warning: Removed 96 rows containing missing values (geom_vline).
## Warning: Removed 96 rows containing missing values (geom_text).

## Warning: Removed 1152 rows containing non-finite values (stat_density).
## Warning: Removed 96 rows containing missing values (geom_vline).
## Warning: Removed 96 rows containing missing values (geom_text).

Animacja

animation_data <- data_with_most_common_res_names %>% 
  select(res_name,
    part_00_shape_segments_count, part_00_density_segments_count,
    part_01_shape_segments_count, part_01_density_segments_count,
    part_02_shape_segments_count, part_02_density_segments_count)

animation_data <-  gather(animation_data, 'key', 'value', -res_name)

animation_data <- animation_data %>% separate(key, into = c("name", "part_number", "abc")) %>% mutate(part_number=as.integer(part_number) + 1)
## Warning: Expected 3 pieces. Additional pieces discarded in 4920 rows [1, 2,
## 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
p <- ggplot(animation_data, aes(abc, value, colour=res_name)) +
  geom_point(position="jitter") +
  transition_time(part_number) +
  theme_bw() + 
  labs(title="{round(frame_time)}") 

animate(p, nframes = 150, fps = 30, width = 600, height = 600,
        renderer = gifski_renderer(loop = F))

Regresja

Wyznaczanie liczby elektronĂłw

correlation <- cor(data_with_most_common_res_names %>% select_if(is.numeric), use="complete.obs", method="spearman")
## Warning in cor(data_with_most_common_res_names %>% select_if(is.numeric), :
## odchylenie standardowe wynosi zero
correlation <- as.data.frame((correlation)) 


correlation
electron_count_predict_data <- select(data_with_most_common_res_names, (local_res_atom_non_h_electron_sum:local_res_atom_S_count))

electron_count_predict_data
indexes <- createDataPartition(electron_count_predict_data$local_res_atom_non_h_electron_sum,
                           p=0.7, list=F)

indexes
##        Resample1
##   [1,]         1
##   [2,]         2
##   [3,]         3
##   [4,]         6
##   [5,]         7
##   [6,]         8
##   [7,]        10
##   [8,]        11
##   [9,]        14
##  [10,]        17
##  [11,]        18
##  [12,]        19
##  [13,]        20
##  [14,]        21
##  [15,]        23
##  [16,]        26
##  [17,]        28
##  [18,]        29
##  [19,]        31
##  [20,]        32
##  [21,]        33
##  [22,]        34
##  [23,]        35
##  [24,]        36
##  [25,]        37
##  [26,]        39
##  [27,]        40
##  [28,]        41
##  [29,]        42
##  [30,]        43
##  [31,]        45
##  [32,]        47
##  [33,]        48
##  [34,]        49
##  [35,]        50
##  [36,]        51
##  [37,]        52
##  [38,]        53
##  [39,]        54
##  [40,]        56
##  [41,]        58
##  [42,]        59
##  [43,]        64
##  [44,]        66
##  [45,]        68
##  [46,]        72
##  [47,]        74
##  [48,]        75
##  [49,]        76
##  [50,]        77
##  [51,]        79
##  [52,]        80
##  [53,]        81
##  [54,]        85
##  [55,]        86
##  [56,]        88
##  [57,]        90
##  [58,]        91
##  [59,]        94
##  [60,]        95
##  [61,]        97
##  [62,]        99
##  [63,]       100
##  [64,]       101
##  [65,]       104
##  [66,]       105
##  [67,]       106
##  [68,]       107
##  [69,]       109
##  [70,]       110
##  [71,]       112
##  [72,]       113
##  [73,]       115
##  [74,]       116
##  [75,]       117
##  [76,]       118
##  [77,]       120
##  [78,]       121
##  [79,]       122
##  [80,]       124
##  [81,]       125
##  [82,]       127
##  [83,]       128
##  [84,]       129
##  [85,]       130
##  [86,]       131
##  [87,]       132
##  [88,]       134
##  [89,]       135
##  [90,]       136
##  [91,]       137
##  [92,]       138
##  [93,]       140
##  [94,]       141
##  [95,]       143
##  [96,]       145
##  [97,]       146
##  [98,]       147
##  [99,]       148
## [100,]       149
## [101,]       150
## [102,]       151
## [103,]       152
## [104,]       154
## [105,]       155
## [106,]       156
## [107,]       157
## [108,]       161
## [109,]       163
## [110,]       164
## [111,]       165
## [112,]       167
## [113,]       168
## [114,]       169
## [115,]       171
## [116,]       172
## [117,]       173
## [118,]       174
## [119,]       175
## [120,]       177
## [121,]       178
## [122,]       179
## [123,]       180
## [124,]       181
## [125,]       183
## [126,]       184
## [127,]       185
## [128,]       187
## [129,]       188
## [130,]       190
## [131,]       191
## [132,]       192
## [133,]       193
## [134,]       194
## [135,]       195
## [136,]       197
## [137,]       198
## [138,]       200
## [139,]       201
## [140,]       203
## [141,]       204
## [142,]       205
## [143,]       206
## [144,]       207
## [145,]       209
## [146,]       210
## [147,]       211
## [148,]       212
## [149,]       213
## [150,]       214
## [151,]       215
## [152,]       216
## [153,]       217
## [154,]       220
## [155,]       221
## [156,]       222
## [157,]       225
## [158,]       227
## [159,]       228
## [160,]       229
## [161,]       230
## [162,]       231
## [163,]       232
## [164,]       233
## [165,]       234
## [166,]       235
## [167,]       236
## [168,]       239
## [169,]       240
## [170,]       243
## [171,]       244
## [172,]       246
## [173,]       247
## [174,]       249
## [175,]       252
## [176,]       253
## [177,]       254
## [178,]       255
## [179,]       256
## [180,]       257
## [181,]       261
## [182,]       263
## [183,]       264
## [184,]       265
## [185,]       267
## [186,]       268
## [187,]       269
## [188,]       270
## [189,]       272
## [190,]       273
## [191,]       274
## [192,]       277
## [193,]       278
## [194,]       281
## [195,]       283
## [196,]       284
## [197,]       285
## [198,]       287
## [199,]       288
## [200,]       289
## [201,]       292
## [202,]       293
## [203,]       297
## [204,]       298
## [205,]       301
## [206,]       302
## [207,]       303
## [208,]       304
## [209,]       305
## [210,]       306
## [211,]       307
## [212,]       308
## [213,]       309
## [214,]       310
## [215,]       311
## [216,]       314
## [217,]       316
## [218,]       318
## [219,]       321
## [220,]       322
## [221,]       323
## [222,]       324
## [223,]       325
## [224,]       326
## [225,]       328
## [226,]       329
## [227,]       331
## [228,]       332
## [229,]       333
## [230,]       334
## [231,]       335
## [232,]       336
## [233,]       338
## [234,]       339
## [235,]       340
## [236,]       341
## [237,]       342
## [238,]       343
## [239,]       345
## [240,]       347
## [241,]       348
## [242,]       349
## [243,]       350
## [244,]       351
## [245,]       352
## [246,]       353
## [247,]       354
## [248,]       357
## [249,]       359
## [250,]       360
## [251,]       362
## [252,]       363
## [253,]       364
## [254,]       365
## [255,]       366
## [256,]       367
## [257,]       369
## [258,]       370
## [259,]       371
## [260,]       372
## [261,]       373
## [262,]       375
## [263,]       377
## [264,]       378
## [265,]       379
## [266,]       380
## [267,]       381
## [268,]       382
## [269,]       383
## [270,]       384
## [271,]       385
## [272,]       386
## [273,]       387
## [274,]       388
## [275,]       389
## [276,]       391
## [277,]       392
## [278,]       396
## [279,]       397
## [280,]       398
## [281,]       399
## [282,]       400
## [283,]       401
## [284,]       402
## [285,]       403
## [286,]       404
## [287,]       405
## [288,]       406
## [289,]       407
## [290,]       409
## [291,]       411
## [292,]       412
## [293,]       413
## [294,]       414
## [295,]       415
## [296,]       416
## [297,]       418
## [298,]       419
## [299,]       422
## [300,]       423
## [301,]       424
## [302,]       425
## [303,]       426
## [304,]       427
## [305,]       428
## [306,]       429
## [307,]       430
## [308,]       433
## [309,]       435
## [310,]       436
## [311,]       437
## [312,]       439
## [313,]       440
## [314,]       441
## [315,]       443
## [316,]       444
## [317,]       445
## [318,]       446
## [319,]       448
## [320,]       450
## [321,]       451
## [322,]       452
## [323,]       453
## [324,]       455
## [325,]       457
## [326,]       460
## [327,]       463
## [328,]       464
## [329,]       465
## [330,]       468
## [331,]       469
## [332,]       470
## [333,]       472
## [334,]       473
## [335,]       474
## [336,]       475
## [337,]       477
## [338,]       479
## [339,]       480
## [340,]       482
## [341,]       483
## [342,]       484
## [343,]       485
## [344,]       486
## [345,]       487
## [346,]       488
## [347,]       489
## [348,]       490
## [349,]       491
## [350,]       492
## [351,]       493
## [352,]       494
## [353,]       497
## [354,]       498
## [355,]       499
## [356,]       500
## [357,]       501
## [358,]       505
## [359,]       508
## [360,]       509
## [361,]       510
## [362,]       511
## [363,]       512
## [364,]       514
## [365,]       515
## [366,]       516
## [367,]       517
## [368,]       518
## [369,]       519
## [370,]       520
## [371,]       522
## [372,]       523
## [373,]       524
## [374,]       525
## [375,]       527
## [376,]       528
## [377,]       529
## [378,]       531
## [379,]       533
## [380,]       534
## [381,]       535
## [382,]       536
## [383,]       537
## [384,]       538
## [385,]       539
## [386,]       540
## [387,]       541
## [388,]       543
## [389,]       544
## [390,]       545
## [391,]       548
## [392,]       551
## [393,]       553
## [394,]       554
## [395,]       557
## [396,]       558
## [397,]       559
## [398,]       560
## [399,]       561
## [400,]       564
## [401,]       567
## [402,]       568
## [403,]       569
## [404,]       570
## [405,]       575
## [406,]       576
## [407,]       578
## [408,]       580
## [409,]       581
## [410,]       582
## [411,]       584
## [412,]       585
## [413,]       586
## [414,]       588
## [415,]       589
## [416,]       590
## [417,]       591
## [418,]       593
## [419,]       595
## [420,]       597
## [421,]       598
## [422,]       599
## [423,]       601
## [424,]       602
## [425,]       603
## [426,]       604
## [427,]       607
## [428,]       608
## [429,]       609
## [430,]       610
## [431,]       611
## [432,]       613
## [433,]       614
## [434,]       615
## [435,]       616
## [436,]       617
## [437,]       618
## [438,]       619
## [439,]       621
## [440,]       622
## [441,]       623
## [442,]       624
## [443,]       625
## [444,]       626
## [445,]       627
## [446,]       629
## [447,]       630
## [448,]       632
## [449,]       633
## [450,]       634
## [451,]       636
## [452,]       638
## [453,]       639
## [454,]       642
## [455,]       643
## [456,]       645
## [457,]       647
## [458,]       648
## [459,]       649
## [460,]       650
## [461,]       651
## [462,]       652
## [463,]       653
## [464,]       654
## [465,]       655
## [466,]       656
## [467,]       658
## [468,]       659
## [469,]       660
## [470,]       661
## [471,]       662
## [472,]       663
## [473,]       665
## [474,]       666
## [475,]       667
## [476,]       668
## [477,]       670
## [478,]       671
## [479,]       675
## [480,]       676
## [481,]       678
## [482,]       679
## [483,]       680
## [484,]       681
## [485,]       683
## [486,]       687
## [487,]       688
## [488,]       689
## [489,]       690
## [490,]       691
## [491,]       693
## [492,]       695
## [493,]       698
## [494,]       699
## [495,]       700
## [496,]       703
## [497,]       704
## [498,]       705
## [499,]       706
## [500,]       707
## [501,]       708
## [502,]       712
## [503,]       714
## [504,]       716
## [505,]       719
## [506,]       720
## [507,]       721
## [508,]       722
## [509,]       724
## [510,]       726
## [511,]       727
## [512,]       730
## [513,]       733
## [514,]       734
## [515,]       735
## [516,]       737
## [517,]       738
## [518,]       740
## [519,]       741
## [520,]       742
## [521,]       744
## [522,]       745
## [523,]       747
## [524,]       748
## [525,]       749
## [526,]       750
## [527,]       751
## [528,]       752
## [529,]       753
## [530,]       754
## [531,]       755
## [532,]       758
## [533,]       759
## [534,]       761
## [535,]       762
## [536,]       763
## [537,]       764
## [538,]       766
## [539,]       768
## [540,]       769
## [541,]       770
## [542,]       773
## [543,]       774
## [544,]       775
## [545,]       776
## [546,]       777
## [547,]       778
## [548,]       779
## [549,]       781
## [550,]       782
## [551,]       785
## [552,]       786
## [553,]       787
## [554,]       788
## [555,]       789
## [556,]       790
## [557,]       791
## [558,]       792
## [559,]       793
## [560,]       795
## [561,]       797
## [562,]       798
## [563,]       800
## [564,]       802
## [565,]       803
## [566,]       804
## [567,]       805
## [568,]       806
## [569,]       809
## [570,]       810
## [571,]       811
## [572,]       812
## [573,]       813
## [574,]       816
## [575,]       817
## [576,]       819
training_data <- electron_count_predict_data[indexes,]
testing_data <- electron_count_predict_data[-indexes,]

training_data
testing_data
ctrl <- trainControl(
  method = "repeatedcv",
  number = 2,
  repeats = 5)

fit <- train(local_res_atom_non_h_electron_sum ~ .,
             data = training_data,
             method = "lm",
             trControl = ctrl)
fit
## Linear Regression 
## 
## 576 samples
##   5 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (2 fold, repeated 5 times) 
## Summary of sample sizes: 288, 288, 289, 287, 288, 288, ... 
## Resampling results:
## 
##   RMSE      Rsquared   MAE     
##   16.70894  0.9717743  11.17189
## 
## Tuning parameter 'intercept' was held constant at a value of TRUE
predicted_values <- predict(fit, newdata = testing_data)

predicted_values
##         4         5         9        12        13        15        16 
##  98.21186  98.21186  55.98714  55.98714  55.98714  55.98714  55.98714 
##        22        24        25        27        30        38        44 
##  55.98714  53.05014  53.05014  53.05014  55.98714  50.96603 337.64707 
##        46        55        57        60        61        62        63 
## 150.83809 223.48021  38.20415  38.20415  38.20415 223.48021  18.30966 
##        65        67        69        70        71        73        78 
##  38.20415  38.13889  37.38832  53.09909  15.12790  55.98714  38.20415 
##        82        83        84        87        89        92        93 
##  42.96658  42.96658  42.96658  55.98714  18.79916 200.00072  42.96658 
##        96        98       102       103       108       111       114 
##  55.98714  50.96603 378.13846 105.07187  15.73978 105.07187  18.79916 
##       119       123       126       133       139       142       144 
##  50.96603  38.20415  38.20415 337.64707 290.55235 290.55235 290.55235 
##       153       158       159       160       162       166       170 
## 290.55235 290.55235 290.55235 290.55235 290.55235 290.55235  16.55561 
##       176       182       186       189       196       199       202 
## 290.55235 290.55235 290.55235 290.55235 290.55235 290.55235  88.56163 
##       208       218       219       223       224       226       237 
##  88.56163  88.56163  88.56163  88.56163  88.56163  88.56163 187.94058 
##       238       241       242       245       248       250       251 
## 187.94058 187.94058  50.96603  50.96603  50.96603  50.96603  18.79916 
##       258       259       260       262       266       271       275 
##  17.98332  17.98332  17.98332  14.61801  38.17355  50.96603  55.98714 
##       276       279       280       282       286       290       291 
##  53.53964  53.53964  53.53964  53.53964  53.53964 233.77025 233.77025 
##       294       295       296       299       300       312       313 
## 233.77025 233.77025 233.77025 233.77025 233.77025  15.12790  15.12790 
##       315       317       319       320       327       330       337 
##  15.12790  15.12790  15.12790  14.31207  15.12790  14.31207  50.96603 
##       344       346       355       356       358       361       368 
## 105.07187 105.07187 105.07187 105.07187 105.07187 105.07187  13.93678 
##       374       376       390       393       394       395       408 
##  82.01466  82.01466  16.75957  96.05836  96.05836  96.05836  15.12790 
##       410       417       420       421       431       432       434 
##  56.92121 290.55235 290.55235 290.55235  55.98714  55.98714  38.17355 
##       438       442       447       449       454       456       458 
## 230.28691  42.96658 314.16758  16.14770 378.13846  50.96603  15.73978 
##       459       461       462       466       467       471       476 
##  50.96603  50.96603  55.98714  38.20415  18.79916  38.20415  89.25166 
##       478       481       495       496       502       503       504 
## 186.90827  55.98714  18.79916  18.79916  18.79916  16.75957  16.75957 
##       506       507       513       521       526       530       532 
## 204.00123 204.00123 223.69770  38.20415  16.55561 290.55235 290.55235 
##       542       546       547       549       550       552       555 
## 290.55235 290.55235 290.55235 290.55235 290.55235 105.07187 105.07187 
##       556       562       563       565       566       571       572 
## 105.07187 105.07187 105.07187 105.07187  55.98714  55.98714  55.98714 
##       573       574       577       579       583       587       592 
##  55.98714  55.98714  55.98714  55.98714  55.98714 105.07187  55.98714 
##       594       596       600       605       606       612       620 
##  55.98714  55.98714  96.05836  47.47739  16.55561  16.55561  50.96603 
##       628       631       635       637       640       641       644 
##  17.77937  16.75957  50.96603  50.96603  50.96603  50.96603  16.75957 
##       646       657       664       669       672       673       674 
##  55.98714  94.35531  94.35531 305.11182 241.60232 241.60232 241.60232 
##       677       682       684       685       686       692       694 
## 241.60232 241.60232  16.55561 105.07187 105.07187  56.92121 105.07187 
##       696       697       701       702       709       710       711 
##  56.92121  16.14770 105.07187  56.92121  84.14251  60.32181  66.27698 
##       713       715       717       718       723       725       728 
##  66.27698  16.14770  60.32181  36.50110  16.14770  60.32181  72.23216 
##       729       731       732       736       739       743       746 
##  96.05286  15.98453  16.75957  14.33246  24.10209  24.10209  16.55561 
##       756       757       760       765       767       771       772 
##  48.73880  38.20415 327.12194  76.48979  76.48979  50.96603  14.56498 
##       780       783       784       794       796       799       801 
##  21.49141  17.57541  22.47042 187.45726  20.51241 176.52124 191.63842 
##       807       808       814       815       818       820 
##  18.79916  18.79916  50.96603  50.96603  38.20415  55.98714
RMSE(testing_data$local_res_atom_non_h_electron_sum, predicted_values)
## [1] 17.49885
R2(testing_data$local_res_atom_non_h_electron_sum, predicted_values)
## [1] 0.9691841